{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "import re\n",
    "import copy\n",
    "import dtale\n",
    "#pd.set_option(\"Max_columns\", None)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### PITF 2018 data\n",
    "- http://www.systemicpeace.org/inscrdata.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# set different threshold of peace years prior to conflict\n",
    "threshold_peace = 2\n",
    "# year_lag\n",
    "yr_p = threshold_peace\n",
    "yr_l = 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_pitf_rc = pd.read_excel(\"data/PITF 2018/PITF Adverse Regime Change 2018.xls\")\n",
    "dt_pitf_ew = pd.read_excel(\"data/PITF 2018/PITF Ethnic War 2018.xls\")\n",
    "dt_pitf_gp = pd.read_excel(\"data/PITF 2018/PITF GenoPoliticide 2018.xls\")\n",
    "dt_pitf_rw = pd.read_excel(\"data/PITF 2018/PITF Revolutionary War 2018.xls\")\n",
    "#\n",
    "dt_pitf_rc_onging = dt_pitf_rc[[\"COUNTRY\", \"YEAR\"]].copy()\n",
    "dt_pitf_rc_onging.COUNTRY = dt_pitf_rc_onging.COUNTRY.str.strip()\n",
    "dt_pitf_rc_onging[\"rc_ongoing\"]= 1\n",
    "dt_pitf_ew_onging = dt_pitf_ew[[\"COUNTRY\", \"YEAR\"]].copy()\n",
    "dt_pitf_ew_onging.COUNTRY = dt_pitf_ew_onging.COUNTRY.str.strip()\n",
    "dt_pitf_ew_onging[\"ew_ongoing\"]= 1\n",
    "dt_pitf_gp_onging = dt_pitf_gp[[\"COUNTRY\", \"YEAR\"]].copy()\n",
    "dt_pitf_gp_onging.COUNTRY = dt_pitf_gp_onging.COUNTRY.str.strip()\n",
    "dt_pitf_gp_onging[\"gp_ongoing\"]= 1\n",
    "dt_pitf_rw_onging = dt_pitf_rw[[\"COUNTRY\", \"YEAR\"]].copy()\n",
    "dt_pitf_rw_onging.COUNTRY = dt_pitf_rw_onging.COUNTRY.str.strip()\n",
    "dt_pitf_rw_onging[\"rw_ongoing\"]= 1\n",
    "dt_pitf_consolidate = pd.merge(left=dt_pitf_rc_onging, right = dt_pitf_ew_onging, on=[\"COUNTRY\", \"YEAR\"], how=\"outer\")\n",
    "dt_pitf_consolidate = pd.merge(left=dt_pitf_consolidate, right = dt_pitf_gp_onging, on=[\"COUNTRY\", \"YEAR\"], how=\"outer\")\n",
    "dt_pitf_consolidate = pd.merge(left=dt_pitf_consolidate, right = dt_pitf_rw_onging, on=[\"COUNTRY\", \"YEAR\"], how=\"outer\")\n",
    "dt_pitf_consolidate.fillna(0, inplace=True)\n",
    "dt_pitf_consolidate.columns = [i.lower() for i in dt_pitf_consolidate.columns]\n",
    "dt_pitf_consolidate.to_excel(\"data/PITF 2018/PITF Consolidate 2018.xlsx\", index=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "year_pitf = [y for y in range(dt_pitf_consolidate.year.min()-10, dt_pitf_consolidate.year.max()+1)]\n",
    "c_list = []\n",
    "y_list = []\n",
    "for c in dt_pitf_consolidate.country.unique():\n",
    "    for y in year_pitf:\n",
    "        c_list.append(c)\n",
    "        y_list.append(y)\n",
    "dt_pitf_consolidate_long = pd.DataFrame({\"country\":c_list, \"year\":y_list})\n",
    "dt_pitf_consolidate_long = pd.merge(left = dt_pitf_consolidate_long, right = dt_pitf_consolidate, on= [\"country\", \"year\"], how=\"left\")\n",
    "dt_pitf_consolidate_long.fillna(0, inplace=True)\n",
    "dt_pitf_consolidate_long[\"conflict_ongoing\"] = dt_pitf_consolidate_long[[\"rc_ongoing\", \"ew_ongoing\", \"gp_ongoing\", \"rw_ongoing\"]].sum(axis=1)\n",
    "for c in dt_pitf_consolidate_long.columns[2:]:\n",
    "    dt_pitf_consolidate_long[c] = dt_pitf_consolidate_long[c].astype(\"int\")\n",
    "dt_pitf_consolidate_long.insert(2, \"country_year\", dt_pitf_consolidate_long.country +\"_\"+dt_pitf_consolidate_long.year.astype(str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "c_dt = dt_pitf_consolidate_long.country.sort_values().unique()\n",
    "y_dt = dt_pitf_consolidate_long.year.sort_values().unique()\n",
    "onset_pitf = []\n",
    "onset_rconly_pitf = []\n",
    "for c in c_dt:\n",
    "    yrs_peace = 0\n",
    "    for y in y_dt:\n",
    "        conflict_ongoing = dt_pitf_consolidate_long[(dt_pitf_consolidate_long.country==c)&(dt_pitf_consolidate_long.year==y)].conflict_ongoing.values[0]\n",
    "        rc_ongoing = dt_pitf_consolidate_long[(dt_pitf_consolidate_long.country==c)&(dt_pitf_consolidate_long.year==y)].rc_ongoing.values[0]\n",
    "        if conflict_ongoing == 0:\n",
    "            yrs_peace += 1\n",
    "        else:\n",
    "            if yrs_peace >= threshold_peace:\n",
    "                onset_pitf.append(f\"{c}_{y}\")\n",
    "                # if the onset case is regime change only, we will exclude\n",
    "                if conflict_ongoing == rc_ongoing ==1:\n",
    "                    onset_rconly_pitf.append(f\"{c}_{y}\")\n",
    "            yrs_peace = 0        "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### PRIO\n",
    "- New DV from PRIO: \n",
    "    - 3 & 4 https://ucdp.uu.se/downloads/index.html#onesided  UCDP/PRIO Armed Conflict Dataset version 21.1\n",
    "    - cumulative_intensity needs to be 1 for the span of a conflict\n",
    "    - starting year is in start_date2\n",
    "    - one unique conflict episode (classified by conflict_id) can have several events, i.e., different starting_date2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_prio = pd.read_csv(\"data/ucdp-prio-acd-211-csv/ucdp-prio-acd-211.csv\")\n",
    "dt_prio = dt_prio[(dt_prio.year<=2018)& (dt_prio.type_of_conflict.isin([3,4]))].reset_index(drop=True)\n",
    "conflictid_cumu = []\n",
    "for i in dt_prio.conflict_id.unique():\n",
    "    if dt_prio[dt_prio.conflict_id==i].cumulative_intensity.sum()>=1:\n",
    "        conflictid_cumu.append(i)\n",
    "dt_prio = dt_prio[dt_prio.conflict_id.isin(conflictid_cumu)].reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Bolivia\n",
      "Cameroon\n",
      "Costa Rica\n",
      "Paraguay\n",
      "United States of America\n"
     ]
    }
   ],
   "source": [
    "### concord to PITF data\n",
    "def prio_concord(df):\n",
    "    df.loc[df[\"location\"]==\"Bosnia-Herzegovina\", \"location\"] = \"Bosnia\"\n",
    "    df.loc[df[\"location\"]==\"Cambodia (Kampuchea)\", \"location\"] = \"Cambodia\"\n",
    "    df.loc[df[\"location\"]==\"Congo\", \"location\"] = \"Congo-Brazzaville\"\n",
    "    df.loc[df[\"location\"]==\"DR Congo (Zaire)\", \"location\"] = \"Congo-Kinshasa\"\n",
    "    df.loc[df[\"location\"]==\"Hyderabad\", \"location\"] = \"India\"\n",
    "    df.loc[df[\"location\"]==\"Madagascar (Malagasy)\", \"location\"] = \"Madagascar\"\n",
    "    df.loc[(df[\"location\"]==\"Russia (Soviet Union)\") & (df[\"year\"]<=1991), \"location\"] = \"USSR\"\n",
    "    df.loc[(df[\"location\"]==\"Russia (Soviet Union)\") & (df[\"year\"]>1991), \"location\"] = \"Russia\"\n",
    "    df.loc[df[\"location\"]==\"Serbia (Yugoslavia)\", \"location\"] = \"Yugoslavia\"\n",
    "    df.loc[df[\"location\"]==\"South Vietnam\", \"location\"] = \"Vietnam South\"\n",
    "    df.loc[df[\"location\"]==\"South Yemen\", \"location\"] = \"Yemen South\"\n",
    "    df.loc[(df[\"location\"]==\"Yemen (North Yemen)\") & (df[\"year\"]<=1989), \"location\"] = \"Yemen North\"\n",
    "    df.loc[(df[\"location\"]==\"Yemen (North Yemen)\") & (df[\"year\"]>1989), \"location\"] = \"Yemen\"\n",
    "    df.loc[df[\"location\"]==\"Zimbabwe (Rhodesia)\", \"location\"] = \"Zimbabwe\"\n",
    "    return df\n",
    "# cases completly not covered in PITF\n",
    "dt_prio = prio_concord(dt_prio)\n",
    "for c in dt_prio.location.sort_values().unique():\n",
    "    if c not in dt_pitf_consolidate.country.unique():\n",
    "        print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# threshold for reaching cumulative intensity\n",
    "dt_prio_cumul = dt_prio[[\"conflict_id\",\"location\", \"year\", \"cumulative_intensity\", \"start_date2\"]].copy()\n",
    "prio_ongoing = []\n",
    "c_prio = []\n",
    "y_prio = []\n",
    "for c in dt_prio_cumul.location.sort_values().unique():\n",
    "    dt_prio_c = dt_prio_cumul[dt_prio_cumul.location==c].copy()\n",
    "    y_c_set = set()\n",
    "    # conflict id\n",
    "    for cid in dt_prio_c.conflict_id.unique():\n",
    "        dt_prio_c_id = dt_prio_c[dt_prio_c.conflict_id==cid].copy()\n",
    "        # start_date2\n",
    "        for sd2 in dt_prio_c_id.start_date2.unique():\n",
    "            dt_prio_c_id_sd2 = dt_prio_c_id[dt_prio_c_id.start_date2==sd2].copy()\n",
    "            if dt_prio_c_id_sd2.cumulative_intensity.sum()>0:\n",
    "                y_end = dt_prio_c_id_sd2.year.max()\n",
    "                y_start = int(sd2[-4:])\n",
    "                y_c_set = y_c_set | set(i for i in range(y_start, y_end+1))\n",
    "    for y in y_c_set:\n",
    "        c_prio.append(c)\n",
    "        y_prio.append(y)\n",
    "dt_prio_consolidate = pd.DataFrame({\"country\": c_prio , \"year\": y_prio})\n",
    "dt_prio_consolidate[\"prio_ongoing\"] = 1\n",
    "dt_prio_consolidate = dt_prio_consolidate.sort_values(by=[\"country\", \"year\"]).reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "year_prio = [y for y in range(dt_prio_consolidate.year.min()-10, dt_prio_consolidate.year.max()+1)]\n",
    "c_list = []\n",
    "y_list = []\n",
    "for c in dt_prio_consolidate.country.unique():\n",
    "    for y in year_prio:\n",
    "        c_list.append(c)\n",
    "        y_list.append(y)\n",
    "dt_prio_consolidate_long = pd.DataFrame({\"country\":c_list, \"year\":y_list})\n",
    "dt_prio_consolidate_long = pd.merge(left = dt_prio_consolidate_long, right = dt_prio_consolidate, on= [\"country\", \"year\"], how=\"left\")\n",
    "dt_prio_consolidate_long.fillna(0, inplace=True)\n",
    "for c in dt_prio_consolidate_long.columns[2:]:\n",
    "    dt_prio_consolidate_long[c] = dt_prio_consolidate_long[c].astype(\"int\")\n",
    "dt_prio_consolidate_long.insert(2, \"country_year\", dt_prio_consolidate_long.country +\"_\"+dt_prio_consolidate_long.year.astype(str))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# same threshold as PITF\n",
    "c_dt = dt_prio_consolidate_long.country.sort_values().unique()\n",
    "y_dt = dt_prio_consolidate_long.year.sort_values().unique()\n",
    "onset_prio = []\n",
    "for c in c_dt:\n",
    "    yrs_peace = 0\n",
    "    for y in y_dt:\n",
    "        conflict_ongoing = dt_prio_consolidate_long[(dt_prio_consolidate_long.country==c)&(dt_prio_consolidate_long.year==y)].prio_ongoing.values[0]\n",
    "        if conflict_ongoing == 0:\n",
    "            yrs_peace += 1\n",
    "        else:\n",
    "            if yrs_peace >= threshold_peace:\n",
    "                onset_prio.append(f\"{c}_{y}\")\n",
    "            yrs_peace = 0        "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_pitf_onset = dt_pitf_consolidate_long[dt_pitf_consolidate_long.country_year.isin([c for c in onset_pitf if c not in onset_rconly_pitf])].reset_index(drop=True)\n",
    "dt_prio_onset = dt_prio_consolidate_long[dt_prio_consolidate_long.country_year.isin(onset_prio)].reset_index(drop=True)\n",
    "#####\n",
    "peace_time_list = []\n",
    "for cy in dt_pitf_onset.country_year:\n",
    "    c = cy.split(\"_\")[0]\n",
    "    y = int(cy.split(\"_\")[1])\n",
    "    dt_cy = dt_pitf_consolidate_long[(dt_pitf_consolidate_long.country==c)&(dt_pitf_consolidate_long.year<y) & (dt_pitf_consolidate_long.year>=1956)]\n",
    "    dt_cy = dt_cy.sort_values(by=[\"year\"], ascending=False).reset_index(drop=True)\n",
    "    peace_time = 0\n",
    "    for y in dt_cy.conflict_ongoing:\n",
    "        if y>0:\n",
    "            break\n",
    "        else:\n",
    "            peace_time+=1\n",
    "    peace_time_list.append(peace_time)\n",
    "dt_pitf_onset[\"peace_years\"] = copy.deepcopy(peace_time_list)\n",
    "#\n",
    "peace_time_list = []\n",
    "for cy in dt_prio_onset.country_year:\n",
    "    c = cy.split(\"_\")[0]\n",
    "    y = int(cy.split(\"_\")[1])\n",
    "    dt_cy = dt_prio_consolidate_long[(dt_prio_consolidate_long.country==c)&(dt_prio_consolidate_long.year<y) & (dt_prio_consolidate_long.year>=1956)]\n",
    "    dt_cy = dt_cy.sort_values(by=[\"year\"], ascending=False).reset_index(drop=True)\n",
    "    peace_time = 0\n",
    "    for y in dt_cy.prio_ongoing:\n",
    "        if y>0:\n",
    "            break\n",
    "        else:\n",
    "            peace_time+=1\n",
    "    peace_time_list.append(peace_time)\n",
    "dt_prio_onset[\"peace_years\"] = copy.deepcopy(peace_time_list)\n",
    "####\n",
    "dt_pitf_onset = dt_pitf_onset[[\"country\", \"year\", \"country_year\", \"conflict_ongoing\", \"peace_years\"]].copy()\n",
    "dt_pitf_onset.rename(columns={\"conflict_ongoing\": \"PITF_onset\"}, inplace=True)\n",
    "dt_prio_onset.rename(columns={\"prio_ongoing\": \"PRIO_onset\"}, inplace=True)\n",
    "####\n",
    "onset_to_iv = {\n",
    "    \"Congo-Brazzaville\": \"Congo Brazzaville\",\n",
    "    \"Congo-Kinshasa\": \"Congo Kinshasa\",\n",
    "    \"South Sudan\": \"Sudan South\",\n",
    "    \"Sudan-North\": \"Sudan\",\n",
    "    \"United States of America\": \"United States\",\n",
    "}\n",
    "dt_prio_onset.country = dt_prio_onset.country.replace(onset_to_iv)\n",
    "dt_pitf_onset.country = dt_pitf_onset.country.replace(onset_to_iv)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "dt_iv = pd.read_excel(\"data/Minerva_Clustering_20220209_MARupdated_Vdemadded.xlsx\")\n",
    "###\n",
    "dt_pitf_onset = dt_pitf_onset.drop(columns=[\"country_year\", \"PITF_onset\"])\n",
    "dt_pitf_onset[\"year\"] = dt_pitf_onset.year - yr_l\n",
    "dt_pitf_onset = pd.merge(left=dt_iv, right=dt_pitf_onset, on =[\"country\", \"year\"], how=\"right\")\n",
    "dt_pitf_onset[\"year\"] = dt_pitf_onset.year + yr_l\n",
    "dt_pitf_onset.insert(2, \"country_year_onset\", dt_pitf_onset.country + \"_\" + dt_pitf_onset.year.astype(str))\n",
    "dt_pitf_onset = dt_pitf_onset.drop_duplicates()\n",
    "###\n",
    "dt_prio_onset = dt_prio_onset.drop(columns=[\"country_year\", \"PRIO_onset\"])\n",
    "dt_prio_onset[\"year\"] = dt_prio_onset.year - yr_l\n",
    "dt_prio_onset = pd.merge(left=dt_iv, right=dt_prio_onset, on =[\"country\", \"year\"], how=\"right\")\n",
    "dt_prio_onset[\"year\"] = dt_prio_onset.year + yr_l\n",
    "dt_prio_onset.insert(2, \"country_year_onset\", dt_prio_onset.country + \"_\" + dt_prio_onset.year.astype(str))\n",
    "dt_prio_onset = dt_prio_onset.drop_duplicates()\n",
    "##\n",
    "dt_pitf_onset[\"py_pt8\"] = 2**(-dt_pitf_onset.peace_years/8)\n",
    "dt_prio_onset[\"py_pt8\"] = 2**(-dt_prio_onset.peace_years/8)\n",
    "##\n",
    "dt_pitf_onset.to_excel(f\"data/PITF_{yr_p}yrpeace_{yr_l}yrlag.xlsx\", index=False)\n",
    "dt_prio_onset.to_excel(f\"data/PRIO_{yr_p}yrpeace_{yr_l}yrlag.xlsx\", index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
